rm(list = ls())

#Install packages for prediction models

# 
# install.packages("readstata13")
# install.packages("glmnet")
# install.packages("stargazer")
# install.packages("texreg")
# install.packages("lm.beta")
#Load the libraries  


library(foreign)
library(readstata13)
library(glmnet)
library(stargazer)
library(texreg)
library(lm.beta)


#get home directory and determine paths
datapath <- "/Users/hanyang/tu2/Data/Clean/" 
outpath <- "/Users/hanyang/tu2/Output/Data" 



setwd(datapath)
data <- read.dta13("TU_place_vs_customer_cov_std.dta")

#Describe the dataset

#Dimensions of the data
dim(data)

#Data types and first few obs
str(data)

#which version? 1) baseline 2) control for any bankruptcy filings place_effect
version<-2

#all covariates under consideration
# note keep the order as in stata
if (version==1){
  X<-model.matrix(~ median_sa + garnish + chp7fee + chp13fee
                + creditlimit_bal + numbr_sqmi + national
                + inc_median + inc_gini + house_own + house_median + own_vehicle
                + edu_bachelor + job_employed + covered_any + hospital_fp
                , data)
}else{
  X<-model.matrix(~ bkrty3_p + median_sa + garnish + chp7fee + chp13fee
                + creditlimit_bal + numbr_sqmi + national 
                + inc_median + inc_gini + house_own + house_median + own_vehicle
                + edu_bachelor + job_employed + covered_any + hospital_fp
                , data)
}
#subtract intercept
X<-X[,-1]



#TU outcome variables
if (version==1){
  tuVar<-cbind("unpdcoly3", "colmed", "colexmed", "ccdqy3", "bkrt7y3", "bkrt13y3", "bkrty3")
}else{  
  tuVar<-cbind("bkrt7y3", "bkrt13y3") 
}

#lasso cross validation
post_lasso <-function(data, vars, X, w, cvn){
  #fit lasso with cross-validation
  set.seed(123)
  cvfit <- cv.glmnet(x=X, y=data[[vars]], weights = w, type.measure="mse", alpha=1, family="gaussian", nfolds = cvn) 
  
  #select variables
  beta <- coef(cvfit, s = "lambda.min")
  var_lasso <- which(beta[2:length(beta)]!=0)
  
  #post lasso
  Xtemp<-X[, var_lasso ]
  f <- lm(data[[vars]] ~ Xtemp, weights = w)
  
  #coef and standard error
  out<-summary(f)
  est <- out$coefficients[,1:2]
  
  
  coef <-matrix(0, nrow=nvar, ncol=2)
  coef[var_lasso, ]<-est[2:(dim(est)[1]),]
  
  #return post-lasso results
  return(coef)
}

##
cvn<-4
w <- as.numeric(unlist(data["numobs"]))

# Loop over TU financial distress measures
nvar <- dim(X)[2]
outVar<-Matrix(0,ncol=0, nrow=nvar, sparse=TRUE)

for(i in 1:length(tuVar)){
  allVar      <- paste(tuVar[i], "fe", sep="_")
  placeVar    <- paste(tuVar[i], "p",  sep="_")
  
  #post Lasso
  lasso_a <-post_lasso(data, allVar,   X, w, cvn)
  lasso_p <-post_lasso(data, placeVar, X, w, cvn)
  
  #combine variables
  lasso <-cbind(lasso_a, lasso_p)

  coef_a <- paste(tuVar[i], "_fe", sep="")
  coef_p <- paste(tuVar[i], "_p", sep="")
  colnames(lasso)<-c(paste(coef_a, "_b_pl" , sep=""),
                     paste(coef_a, "_se_pl", sep=""),
                     paste(coef_p, "_b_pl" , sep=""),
                     paste(coef_p, "_se_pl", sep=""))
  outVar<-cbind(outVar, lasso)
}

#export csv



setwd(outpath)
write.csv(as.matrix(outVar), file = paste("post_lasso_v", version, ".csv",sep=""))

